import re
import requests

# 发请求获取响应数据
def get_html():
    url = 'http://www.weather.com.cn/weather/101010100.shtml'

    # 防止反爬
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
    }

    resp = requests.get(url, headers=headers)  # resp 是相应结果对象
    # 设置相应的编码格式
    resp.encoding = 'utf-8'
    return resp.text

# 提取并处理所需要的数据
def parse_html(html_str):
    # print(resp.text)
    city = re.findall('<span class="name">([\u4e00-\u9fa5]*)</span>', html_str)
    weather = re.findall('<span class="weather">([\u4e00-\u9fa5]*)</span>', html_str)
    wd = re.findall('<span class="wd">(.*)</span>', html_str)
    zs = re.findall('<span class="zs">([\u4e00-\u9fa5]*)</span>', html_str)

    # print(city)
    # print(weather)
    # print(wd)
    # print(zs)

    lst = []
    for a, b, c, d in zip(city, weather, wd, zs):
        lst.append([a, b, c, d])
    return lst
